# -*- coding: utf-8 -*-
"""
The Social Cost of Fiscal Federalism and the
Depletion of China’s Native Forests
Haoyu Wang hywang@vt.edu
"""
import pandas as pd 
import numpy as np
import os
import statsmodels.api as sm
from stargazer.stargazer import Stargazer

#Change the following to the directory containing the Excel Data files
path = "C:/Users/lenovo/Desktop/data"
os.chdir(path)

exchange_rate=0.15

df = pd.read_csv('sm_swl.csv')

dfs=df.copy()

dfs['G']=dfs['G']*exchange_rate*10000/1000000

dfs['P']=dfs['P']*exchange_rate*10000/1000000

summarys = df.describe().transpose()

summarys.drop(['25%','50%','75%'],axis=1,inplace=True)

print(summarys)


dfall = pd.read_csv('df.csv').drop(['G','P','tau','xbar'],axis=1)


df = pd.merge(df, dfall, on=['fbid','year'], how='left')


df = df.rename(columns={'gamma_x': 'gamma',                 
                        'Bdc_a4': 'natural forest',
                        })



df['swl_df']=df['swl']*df['deflator']*exchange_rate*10000/1000000

df['socialv_df']=df['socialv']*1000*df['deflator']*exchange_rate/1000000


df['SFE']=df['provname']+" "+df['name']

df = df[(df['age'].isna()) | (df['age'] <= 140)]

df[df['age'] >= 140].shape[0]

dfout=df[['SFE','year','x','gamma','xs','gammas','swl_df']].dropna()

print("min SWL ",dfout['swl_df'].min())
print("max SWL ",dfout['swl_df'].max())

dfout[['gamma','gammas']] = df[['gamma','gammas']].applymap(lambda x: f"{x * 100:.2f}\%")
dfout[['x','xs','swl_df']] = dfout[['x','xs','swl_df']].applymap(lambda x: f"{int(round(x)):,}")
latex_sr = dfout.to_latex(
    index=False,
    float_format="%.2f",
    caption="Simulation results for SFEs (first column) in Northeast China",
    label="sr",
    column_format="l" + "r" * (len(dfout.columns))  # Adjust alignment
)


# Save LaTeX code to a file
with open("simulation result.tex", "w") as f:
    f.write(latex_sr)
    

tt=df["swl_df"].dropna().count()
print(tt)


checkcolumns = ['swl','pop','swl_df', 'socialv_df','year','xbar',"natural forest","age"]

# Count the number of null values in selected columns
null_counts =len(df)- df[checkcolumns].isnull().sum()

# Display the result
print("Null value counts in selected columns:")
print(null_counts)


df = df.rename(columns={'swl_df': 'social welfare loss',
                        'socialv_df': 'gross social production value',
                        'pop': 'population',
                        'xbar': 'harvest limit',
                        'tenure_exp': 'manager tenure experience',
                        'age': 'manager age' ,
                        'edu': 'manager education level'
                        })



models = {
    "Model 1": ["gross social production value","population","natural forest"],
    "Model 2": ["gross social production value","population","year"],
    "Model 3": ["gross social production value","population","harvest limit"],
    "Model 4": ["gross social production value","population",'manager education level',"manager age","manager tenure experience"],
    "Model 5": ["gross social production value","population",'manager education level',"manager age","manager tenure experience","natural forest"]
}

# Run regressions and store results
results = []

for model_name, variables in models.items():
    # Subset the DataFrame to only the relevant columns for this model
    relevant_columns = ["social welfare loss"] + variables
    model_df = df[relevant_columns].copy()
    
    # Handle missing and infinite values
    model_df = model_df.replace([np.inf, -np.inf], np.nan).dropna()
    
    if model_df.isnull().values.any():
        print(f"Warning: Missing data in {model_name}")
        continue  # Skip the model if data issues persist

    # Define independent (X) and dependent (y) variables
    X = model_df[variables]
    y = model_df["social welfare loss"]
    
    # Add a constant to the independent variables (intercept)
    X = sm.add_constant(X)
    
    # Fit the OLS regression model
    model = sm.OLS(y, X).fit()
    print(f"=== {model_name} ===")
    print(model.summary())
    results.append(model)

# Generate LaTeX table with Stargazer
stargazer = Stargazer(results)
stargazer.title("Regression Results")
stargazer.custom_columns(["Model 1", "Model 2", "Model 3", "Model 4", "Model 5"], [1, 1, 1, 1, 1])
stargazer.significance_levels([0.1, 0.05, 0.01])
stargazer.significant_digits(5)
# Function to insert commas and keep 3 decimal places
def add_commas_to_stargazer_output(latex_str):
    import re
    # Replace numeric values like 12345.6789 → 12,345.679
    latex_str = re.sub(
        r"([-]?\d+\.\d+|[-]?\d+)",
        lambda m: f"{float(m.group()):,.3f}",
        latex_str
    )
    return latex_str

# Get LaTeX output and format
latex_code = stargazer.render_latex()
with open("regression_results_before_format.tex", "w") as f:
    f.write(latex_code)
    
latex_code = add_commas_to_stargazer_output(latex_code)

# Write final formatted LaTeX code to file
with open("regression_results.tex", "w") as f:
    f.write(latex_code)


sumcol = ["social welfare loss", "gross social production value","population","year", "harvest limit","natural forest","manager tenure experience",'manager education level',"manager age"]

                        
summary = df[sumcol].describe().transpose()

summary.drop(['25%','50%','75%'],axis=1,inplace=True)



latex_code = summary.round(0).applymap(lambda x: f"{int(x):,}" if isinstance(x, (int, float)) else x).to_latex(
    index=True,
    escape=False,  # needed so commas are not escaped in LaTeX
    caption="Summary Statistics",
    label="tab:summary_statistics",
    column_format="l" + "r" * len(summary.columns)
)


# Save LaTeX code to a file
with open("summary_statistics.tex", "w") as f:
    f.write(latex_code)

    




